import pandas as pd
fp = "../data/regimed_coffee_prices.csv"
df = pd.read_csv(fp)
df
date cents_per_lb regime
0 1990-01-01 75.830 R-1
1 1990-02-01 84.010 R-1
2 1990-03-01 93.960 R-1
3 1990-04-01 93.730 R-1
4 1990-05-01 92.020 R-1
... ... ... ...
417 2024-10-01 276.777 R-8
418 2024-11-01 304.953 R-8
419 2024-12-01 344.119 R-8
420 2025-01-01 353.933 R-8
421 2025-02-01 409.516 R-8

422 rows × 3 columns

regimes = df["regime"].unique().tolist()

for r in regimes:
    reg_select = (df["regime"] == r) # select each regime
    df_reg = df[reg_select]
    # the index contains the indices of each regime, discretize the prices with qcut and set
    df.loc[df_reg.index, "price"] = pd.qcut(df_reg["cents_per_lb"], 3, labels=["L", "M", "H"])

    

    
for r in regimes:
    reg_select = (df["regime"] == r) # select each regime
    df_reg = df[reg_select]
    # maintain regime point count to mark the previous price for the first entry in each regime as na
    rpc = 0
    for ri, row in df_reg.iterrows():
        if rpc == 0 :
            rpc += 1
            continue
        else:
            df.loc[ri, "previous_price"] = df.loc[ (ri -1), "price"]
            rpc += 1
sum(df["previous_price"].isna()) 
8
fp = "../data/regimed_coffee_prices.csv"
df.to_csv(fp, index=False)
matrix_dict = {}
for r in regimes:
    reg_select = (df["regime"] == r) # select each regime
    df_reg = df[reg_select]
    # the index contains the indices of each regime, discretize the prices with qcut and set
    df_sm = pd.crosstab(df_reg.price, df_reg.previous_price)
    # the next step normalizes the entry in each row by the row sum
    df_sm = df_sm.div(df_sm.sum(axis=1), axis=0).round(3)
    matrix_dict[r] = df_sm
    fp = "../data/stochastic_matrix_coffee_price-regime-" + r + ".csv"
    df_sm.to_csv(fp, index=True)
    
matrix_dict["R-5"]
previous_price H L M
price
L 0.000 0.778 0.222
M 0.111 0.278 0.611
H 0.833 0.000 0.167
import plotly.express as px

fig = px.imshow(matrix_dict["R-5"], text_auto=True)
fig.update_layout(
    title={
        'text': "Stochastic Matrix for Region 5",
        'y':.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show()